Show the code
if (!"librarian" %in% installed.packages())
install.packages("librarian")
librarian::shelf(ggplot2, zmeers / ggparliament, tidyr, viridis, patchwork, dplyr, readxl, plotly, rworldmap)Maelle Boutraud
July 1, 2024
Here is my project of the 30 Day Chart Challenge using R while trying to bring good data and understand better about visualization and the different graphs you can use… Come learn with me and watch my evolution creating graphs !
library(ggparliament)
library(tidyverse)
seats <- c(51, 188, 52, 68, 219, 73, 42, 35, 23)
names_party <- c("The European United Left/Nordic Green Left",
"Progressive Alliance of Socialists and Democrats",
"Greens/European Free Alliance",
"Alliance of Liberals and Democrats for Europe",
"The European Peoples Party",
"European Conservatives and Reformists",
"Europe of Freedom and Direct Democracy",
"Europe of Nations and Freedom",
"Non-attached members")
# Colours of the different party using https://htmlcolorcodes.com/fr/
party_colours <- c("#C0392B", "#E74C3C", "#52BE80", "#F4D03F", "#21618C", "#3498DB", "#984EA3", "#707B7C", "#999999")
# Abbreviated column for the party
party <- c("GUE/NGL", "S&D", "Greens/EFA", "ALDE", "EPP", "ECR", "EFDD", "ENF", "NA")
# Creation of the data frame
europeen <- data.frame(
party = party,
seats = seats,
mycol = party_colours
)
# Creation of data for the semicircle plot
europeen_semicircle <- parliament_data(election_data = europeen, type = "semicircle", parl_rows = 14, party_seats = europeen$seats)
# Creation of plot
ggplot(europeen_semicircle, aes(x = x, y = y, colour = party)) +
geom_parliament_seats(size = 2.5) +
theme_ggparliament() +
labs(title = "Europeen Parlement, 2024") +
scale_colour_manual(values = party_colours,
limits = party)library(ggplot2)
Naissance <- data.frame(
Country = c("European Union", "Austria", "Belgium", "Bulgaria", "Croatia",
"Cyprus", "Czech Republic", "Denmark","Estonia", "Finland",
"France", "Germany", "Greece", "Hungary", "Ireland", "Italy",
"Latvia", "Lithuania", "Luxembourg", "Netherlands", "Poland",
"Portugal", "Romania", "Slovakia", "Slovenia", "Spain",
"Sweden", "Norway", "Russia", "Switzerland", "United Kingdom",
"Australia", "Canada", "Chile", "Colombia",
"Israel", "Japan", "Mexico", "New-Zealand", "South Korea",
"Turkey", "United States"),
Birth_rate_2022 = c(8.7, 9.1, 9.8, 8.8, 8.8, 11.1, 9.5, 9.9, 8.6, 8.1, 10.6,
8.8, 7.3, 9.3, 11.2, 6.7, 8.5, 7.8, 9.9,
9.5, 8.3, 8.0, 9.6, 9.7, 8.3, 6.9, 10.0, 9.4, 9.6, 9.4,
10.7, 11.5, 9.8, 11.8, 14.2, 19.6, 6.6,
14.9, 12.4, 5.6, 14.7, 11.1)
)
# Creation of plot
ggplot(Naissance, aes(x = reorder(Country, Birth_rate_2022), y = Birth_rate_2022, fill = Birth_rate_2022)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_gradient(low = "skyblue", high = "darkblue") +
labs(title = "Birth rate by Country in 2022", x = "Country", y = "Birth rate") +
theme_minimal()First plot for makeover
library(ggplot2)
library(dplyr)
library(tidyr)
GHG <- data.frame( Sectors = c("Energy industry", "Manufacturing industry",
"Waste treatment", "Residential activities",
"Agriculture", "Road transport", "Other transport", "Total"),
`2017` = c(57.3, 82.9, 15.1, 84.1, 81.3, 130, 8, 459),
`2018` = c(48, 82.8, 14.8, 79, 80.4, 127, 8.1, 440),
`2019` = c(46.2, 79.8, 16, 76, 78.6, 126, 8.2, 431),
`2020` = c(41.3, 72, 15.9, 71.3, 78.2, 107, 6.2, 392),
`2021` = c(42.5, 78, 15.2, 75.1, 76.5, 120, 7.1, 415),
`2022` = c(44.6, 73, 15.2, 64, 76.5, 122, 8.1, 404))
GHG_long <- GHG %>%
pivot_longer(
cols = starts_with("X"),
names_to = "Years",
values_to = "Emissions"
)
neon_colors <- c("#AEC6CF", "#FFB347", "#77DD77", "#FF6961", "#CFCFC4", "#F49AC2", "#FFD1DC")
# Creation of plot
ggplot(GHG_long, aes(x = Emissions, y = reorder(Sectors, Emissions), fill = Years)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = neon_colors, name = "Years") +
theme_minimal() +
labs(
title = "Greenhouse gas emissions by sectors",
subtitle = "(In France)",
x = "Emissions (Mt CO2 equivalent)",
y = "Sectors/ Activity"
)Second Plot - using stacked bars
# Load required packages
library(ggplot2)
library(dplyr)
library(tidyr)
# Data definition
GHG <- data.frame(
Sectors = c("Energy industry", "Manufacturing industry",
"Waste treatment", "Residential activities",
"Agriculture", "Other transport", "Road transport"),
`2017` = c(57.3, 82.9, 15.1, 84.1, 81.3, 8, 130),
`2018` = c(48, 82.8, 14.8, 79, 80.4, 8.1, 127),
`2019` = c(46.2, 79.8, 16, 76, 78.6, 8.2, 126),
`2020` = c(41.3, 72, 15.9, 71.3, 78.2, 6.2, 107),
`2021` = c(42.5, 78, 15.2, 75.1, 76.5, 7.1, 120),
`2022` = c(44.6, 73, 15.2, 64, 76.5, 8.1, 122)
)
# Pivoter les données pour avoir une colonne Annee, Secteur, et Emissions
GHG_long <- GHG %>%
pivot_longer(cols = -Sectors, names_to = "Years", values_to = "Emissions")
# Personalized colours for each activity or sector
sector_colors <- c("#AEC6CF", "#FFB347", "#77DD77", "#FF6961", "#CFCFC4", "#F49AC2", "#FFD1DC")
# Creation of plot
ggplot(GHG_long, aes(x = Years, y = Emissions, fill = Sectors)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(values = sector_colors) +
labs(
title = "Greenhouse gas emissions by sectors",
x = "Years",
y = "Emissions (Mt CO2 equivalent)") +
theme_minimal()This shows the importance of choosing the right graph for certain data for better visualization.
library(waffle)
library(ggplot2)
Energie <- c("Renewable", "Hydropower", "Nuclear", "Natural Gas", "Coal", "Oil")
stats <- c(10.8, 15.7, 10.7, 23.5, 36.7, 2.8)
yey <- data.frame(Energie = Energie, stats = stats)
colors <- c("#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3", "#a6d854", "#ffd92f")
parts <- setNames(yey$stats, yey$Energie)
waffle_chart <- waffle(
parts = parts,
rows = 10,
size = 0.5,
colors = colors
) +
ggtitle("Energy used to generate electricity worldwide") +
labs(caption = "Source: Hello Watt") +
theme(
plot.title = element_text(size = 12, face = "bold", hjust = 0.6),
legend.title = element_blank(),
legend.text = element_text(size = 12),
legend.position = "bottom",
plot.caption = element_text(size = 10, face = "italic"),
plot.title.position = "plot",
plot.margin = margin(10, 10, 10, 10)
)
print(waffle_chart)For the time being, this is from a GitHub example using the networkD3 package. Reference
This graph shows the migration from one country (left) to another (right).
# Libraries
library(tidyverse)
library(viridis)
library(patchwork)
library(hrbrthemes)
library(circlize)
# Load dataset from github,
# let's give it a better name than data.
immidata <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/13_AdjacencyDirectedWeighted.csv", header=TRUE)
# Package we need here
library(networkD3)
# We need a long format
immidata_long <- immidata %>%
rownames_to_column %>%
gather(key = 'key', value = 'value', -rowname) %>%
filter(value > 0)
colnames(immidata_long) <- c("source", "target", "value")
immidata_long$target <- paste(immidata_long$target, " ", sep="")
# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(name=c(as.character(immidata_long$source), as.character(immidata_long$target)) %>% unique())
# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
immidata_long$IDsource=match(immidata_long$source, nodes$name)-1
immidata_long$IDtarget=match(immidata_long$target, nodes$name)-1
# prepare colour scale
ColourScal ='d3.scaleOrdinal() .range(["#FDE725FF","#B4DE2CFF","#6DCD59FF","#35B779FF","#1F9E89FF","#26828EFF","#31688EFF","#3E4A89FF","#482878FF","#440154FF"])'
# Make the Network
sankeyNetwork(Links = immidata_long, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name",
sinksRight=FALSE, colourScale=ColourScal, nodeWidth=40, fontSize=13, nodePadding=20)library(readxl)
library(ggplot2)
library(tidyverse)
library(plotly)
OECD <- read_excel("../Rwork/30DayChartChallenge/OECDmigration.xlsx")
# Renommer la colonne et filtrer les lignes non nulles
migration_data <- rename(OECD, Country = 1)
migration_data <- filter(migration_data, !is.na(Country))
# Convertir les données en format long
migration_long <- pivot_longer(migration_data, -Country, names_to = "Year", values_to = "Value")
# Convertir les colonnes Year et Value en numériques
migration_long$Year <- as.numeric(migration_long$Year)
migration_long$Value <- as.numeric(gsub(",", "", migration_long$Value))
# Créer le graphique avec ggplot2
p <- ggplot(migration_long, aes(x = Year, y = Value, color = Country, group = Country, text = paste("Country:", Country, "<br>Year:", Year, "<br>Value:", Value))) +
geom_line() +
theme_minimal() +
labs(title = "Chronological Chart of International migration",
x = "Year",
y = "People") +
scale_x_continuous(breaks = seq(min(migration_long$Year), max(migration_long$Year), by = 1)) + # Forcer l'affichage de toutes les années
theme(legend.position = "none") # Suppression de la légende
#Convertir le graphique ggplot2 en graphique plotly interactif
p_interactive <- ggplotly(p, tooltip = "text")
# Afficher le graphique interactif
p_interactivelibrary(readxl)
library(ggplot2)
library(tidyverse)
library(plotly)
OECD2 <- read_excel("../Rwork/30DayChartChallenge/OECD2.xlsx")
# Renommer la colonne et filtrer les lignes non nulles
migration_data <- rename(OECD2, Country = 1)
migration_data <- filter(migration_data, !is.na(Country))
# Convertir les données en format long
migration_long <- pivot_longer(migration_data, -Country, names_to = "Year", values_to = "Value")
# Convertir les colonnes Year et Value en numériques
migration_long$Year <- as.numeric(migration_long$Year)
migration_long$Value <- as.numeric(gsub(",", "", migration_long$Value))
# Créer le graphique avec ggplot2
p <- ggplot(migration_long, aes(x = Year, y = Value, color = Country, group = Country, text = paste("Country:", Country, "<br>Year:", Year, "<br>Value:", Value))) +
geom_line() +
theme_minimal() +
labs(title = "Chronological Chart of International migration",
x = "Year",
y = "Proportion of migrants on the total population") +
scale_x_continuous(breaks = seq(min(migration_long$Year), max(migration_long$Year), by = 1)) + # Forcer l'affichage de toutes les années
theme(legend.position = "none") # Suppression de la légende
#Convertir le graphique ggplot2 en graphique plotly interactif
p_interactive <- ggplotly(p, tooltip = "text")
# Afficher le graphique interactif
p_interactivelibrary(rworldmap)
library(readr)
# Data settings
wildfires <- read_csv("../Rwork/30DayChartChallenge/wildfires.csv")
wildfires2023_na <-wildfires[wildfires$Year==2023,]
wildfires2023 <- wildfires2023_na[complete.cases(wildfires2023_na), ]
colnames(wildfires2023)[4]<-"AreaBurnt"
wildfires2023$logArea<-log(wildfires2023$AreaBurnt+1)
wildfires23<-wildfires2023[-which(wildfires2023$logArea<0.1),]
colour_fire = c("#FFFF00","#FFD700","#FFA500","#FF8C00","#E66608","#D14209","#C40A0A","#8E0B0B")
# Join wildfire data with spatial data
sPDF <- joinCountryData2Map(wildfires23,
, joinCode = "ISO3"
, nameJoinColumn = "Code")Data from : https://ourworldindata.org/wildfires
library(ggplot2)
library(tidyr)
library(dplyr)
# Data preparation
temperature <- data.frame(
Année = c("2024", "2023", "2022", "2021", "2020", "2019", "2018", "2017", "2016", "2015", "2014"),
Moyenne = c(6.1, 14.4, 14.5, 12.9, 14.1, 13.7, 13.9, 13.4, 13.1, 13.6, 13.8),
Janvier = c(9.6, 6.3, 5.0, 4.9, 7.1, 4.6, 8.4, 3.1, 7.1, 5.6, 7.6),
Février = c(10.6, 6.9, 8.1, 8.2, 9.3, 8.0, 3.5, 8.2, 7.3, 4.9, 7.9),
Mars = c(12.4, 10.2, 9.9, 8.9, 9.4, 10.1, 8.2, 11.0, 8.0, 9.1, 9.8),
Avril = c(15.5, 11.8, 11.8, 10.4, 14.1, 11.7, 13.8, 11.4, 11.1, 12.6, 13.0),
Mai = c(NA, 16.2, 17.8, 13.8, 16.5, 13.9, 16.3, 16.5, 15.0, 15.5, 14.7),
Juin = c(NA, 21.5, 21.2, 20.3, 18.6, 20.1, 20.1, 21.2, 18.7, 19.8, 19.6),
Juillet = c(NA, 21.9, 23.2, 20.7, 21.6, 23.0, 23.2, 21.7, 21.3, 22.8, 20.6),
Août = c(NA, 22.0, 23.7, 20.0, 22.6, 21.8, 22.3, 21.5, 21.5, 21.6, 19.1),
Septembre = c(NA, 21.1, 18.2, 19.3, 19.0, 18.5, 19.0, 16.4, 19.7, 16.4, 18.9),
Octobre = c(NA, 16.4, 17.2, 13.5, 13.0, 15.1, 14.4, 14.9, 12.6, 12.7, 15.9),
Novembre = c(NA, 10.1, 10.9, 7.9, 10.5, 8.8, 9.6, 8.2, 8.8, 11.2, 11.3),
Décembre = c(NA, 8.0, 6.7, 6.9, 6.9, 8.1, 7.7, 5.9, 5.6, 9.5, 6.3)
)
# Reshape data to long format
temperature_long <- temperature %>%
pivot_longer(cols = Janvier:Décembre, names_to = "Month", values_to = "Temperature")
# Ensure Month is a factor with the correct order
temperature_long$Month <- factor(temperature_long$Month, levels = c("Janvier", "Février", "Mars", "Avril", "Mai", "Juin", "Juillet", "Août", "Septembre", "Octobre", "Novembre", "Décembre"))
# Plot the radial histogram
ggplot(temperature_long, aes(x = Month, y = Temperature, group = Année, fill = Année)) +
geom_bar(stat = "identity", position = "dodge", width = 0.8) +
coord_polar() +
theme_minimal() +
labs(title = "Radial Histogram of Monthly Temperatures",
x = "",
y = "Temperature (°C)",
fill = "Year") +
theme(axis.text.x = element_text(size = 8),
axis.title.y = element_text(size = 10))# Load necessary libraries
library(tidyr)
library(dplyr)
library(ggplot2)
# Simulate reading the Excel file
# temp <- read_excel("./Paris.xlsx")
# Since the actual file is not provided, we'll create the dataframe manually based on the provided structure.
temp <- data.frame(
Month = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Year"),
`Record high °C` = c(16.1, 21.4, 26.0, 30.2, 34.8, 37.6, 42.6, 39.5, 36.2, 28.9, 21.6, 17.1, 42.6),
`Daily mean °C` = c(5.4, 6.0, 9.2, 12.2, 15.6, 18.8, 20.9, 20.8, 17.2, 13.2, 8.7, 5.9, 12.8),
`Record low °C` = c(-14.6, -14.7, -9.1, -3.5, -0.1, 3.1, 6.0, 6.3, 1.8, -3.8, -14.0, -23.9, -23.9)
)
# Reshape the data to long format
temp_long <- temp %>%
pivot_longer(cols = -Month, names_to = "Temperature Type", values_to = "Temperature")
# Remove the 'Year' row as it doesn't represent a specific month
temp_long <- temp_long %>%
filter(Month != "Year")
# Convert Month to a factor with the correct order
temp_long$Month <- factor(temp_long$Month, levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))
# Plot the data
p <- ggplot(data = temp_long, aes(x = Month, y = Temperature, color = `Temperature Type`, group = `Temperature Type`)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(title = "Temperature Trends in Paris",
x = "Month",
y = "Temperature (°C)",
color = "Temperature Type") +
theme_minimal()
print(p)library(ggplot2)
library(plotly)
# Loading data with read_csv()
physical = read_csv("../Rwork/30DayChartChallenge/physical-integrity-rights-index-population-weighted.csv")
# Modify column name to prepare the ploting
colnames(physical)[4] = "index"
# Create plot
plot_physical <- ggplot(data = physical, aes(x = Year, y = index, group = Entity, color = Entity)) +
geom_line() +
labs(title = "Physical integrity rights index",
x = "Year",
y = "Physical violence index",
color = "Country") +
theme_minimal()
#plot_physical
# Convert plot to plotly to make it interactive
plotly_physical <- ggplotly(plot_physical)
# Print the interactive plot
plotly_physical# Load the library
library(ggplot2)
# Create data for each age group
years <- c(2011, 2016, 2022)
age_1_6 <- c(12.29, 14.15, 14.21)
age_7_12 <- c(19.58, 20.27, 24.41)
age_13_19 <- c(30.23, 32.6, 36.08)
# Create a data frame for each age group
screentime_1_6 <- data.frame(
year = years,
duration = age_1_6,
age_group = rep("1-6 years", length(years))
)
screentime_7_12 <- data.frame(
year = years,
duration = age_7_12,
age_group = rep("7-12 years", length(years))
)
screentime_13_19 <- data.frame(
year = years,
duration = age_13_19,
age_group = rep("13-19 years", length(years))
)
# Combine the data frames into one
screentime <- rbind(screentime_1_6, screentime_7_12, screentime_13_19)
# Create the plot using ggplot2
p <- ggplot(screentime, aes(x = year, y = duration, color = age_group, group = age_group)) +
geom_line(size = 1) +
geom_point(size = 2) +
annotate("text", x = screentime$year, y = screentime$duration + 0.5,
label = paste(round(screentime$duration, 2), "h"),
vjust = -0.5, size = 3, color = "black") + # Add annotations just above the points
scale_color_manual(values = c("1-6 years" = "#FDBF6F", "7-12 years" = "#B2DF8A", "13-19 years" = "#A6CEE3")) +
scale_x_continuous(breaks = seq(2012, 2022, by = 1)) +
scale_y_continuous(breaks = seq(0, max(screentime$duration), by = 5)) + # Set y-axis labels every 5 hours
labs(
title = "Evolution of weekly screen time for children and adolescents in France",
x = "Years",
y = "Duration in hours",
color = "Age groups"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1)
)
print(p) # Display the plotSource : https://fr.statista.com/statistiques/1414345/duree-utilisation-ecran-france-jeunes/
# Load necessary libraries
library(rworldmap)
library(dplyr)
# Create data frame
data <- data.frame(
country = c("Algeria", "Angola", "Benin", "Botswana", "Burkina Faso", "Burundi",
"Cabo Verde", "Cameroon", "Central African Republic", "Chad", "Comoros",
"Côte d'Ivoire", "Democratic Republic of the Congo", "Djibouti", "Egypt",
"Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon", "Gambia",
"Ghana", "Guinea", "Guinea-Bissau", "Kenya", "Lesotho", "Liberia", "Libya",
"Madagascar", "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco",
"Mozambique", "Namibia", "Niger", "Nigeria", "Rwanda", "São Tomé and Príncipe",
"Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa",
"South Sudan", "Sudan", "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia",
"Zimbabwe"),
criminalisation = c("De facto criminalisation", "No criminalisation", "No criminalisation",
"No criminalisation", "No criminalisation", "Up to 8 years imprisonment",
"No criminalisation", "Up to 8 years imprisonment", "Up to 8 years imprisonment",
"Up to 8 years imprisonment", "Up to 8 years imprisonment", "No criminalisation",
"No criminalisation", "No criminalisation", "De facto criminalisation",
"No criminalisation", "Up to 8 years imprisonment", "No criminalisation",
"Up to 8 years imprisonment", "No criminalisation", "10 years to life in prison",
"Up to 8 years imprisonment", "10 years to life in prison", "No criminalisation",
"Up to 8 years imprisonment", "No criminalisation", "Up to 8 years imprisonment",
"De facto criminalisation", "No criminalisation", "Up to 8 years imprisonment",
"No criminalisation", "Death Penalty", "Up to 8 years imprisonment",
"Up to 8 years imprisonment", "No criminalisation", "No criminalisation",
"No criminalisation", "Death Penalty (possible)", "No criminalisation",
"No criminalisation", "10 years to life in prison", "No criminalisation",
"Up to 8 years imprisonment", "Death Penalty", "No criminalisation",
"De facto criminalisation", "Death Penalty (possible)", "Up to 8 years imprisonment",
"Up to 8 years imprisonment", "Up to 8 years imprisonment",
"Death Penalty (possible)", "Up to 8 years imprisonment",
"Up to 8 years imprisonment")
)
# Convert 'criminalisation' column to factor with specified levels
data$criminalisation <- factor(data$criminalisation, levels = c("No criminalisation",
"De facto criminalisation",
"Up to 8 years imprisonment",
"10 years to life in prison",
"Death Penalty (possible)",
"Death Penalty"))
# Define a vector of colors
colour_criminalisation <- c("white", "#FFECB3", "#FFCC80", "#FF8A65", "#F4511E", "#B71C1C")
# Join the data frame with the map data
sPDF <- joinCountryData2Map(data,
joinCode = "NAME",
nameJoinColumn = "country")# Plot the map
mapCountryData(sPDF,
nameColumnToPlot = 'criminalisation',
colourPalette = colour_criminalisation,
catMethod = "categorical",
mapTitle = "Criminalisation of consensual same-sex sexual acts in Africa (2024)",
xlim = c(-20, 60),
ylim = c(-35, 40),
oceanCol = "#EBF5FB",
lwd = 0.5,
addLegend = FALSE) # Disable default legend
# Manually add a smaller legend on the side
legend("bottomleft", legend = levels(data$criminalisation), fill = colour_criminalisation, bty = "n", cex = 0.8)Inspired by :#https://www.reuters.com/graphics/UGANDA-LGBT/movakykrjva/ and data from : https://database.ilga.org/criminalisation-consensual-same-sex-sexual-acts
library(ggraph)
library(igraph)
first_name = c(NA , "Agnes", "Vincent", "Susan", "Henri", "Anne-Marie", "Serge", "Sonia", "Peter", "Simone", "Andre", "George", "mama Papi", "one Nanie", "two Nanie", "Lia", "Thais", "Olivier",
"Joseph", "Sara", "Lucette", "Me", NA, NA, NA, NA, "Marie-Laure", "Antoine", "Arthur", NA, "Eric", NA, "Johnny", "Camille",
"Albin", "Nina", "Isabelle", NA, "Laurie", "Iris", "Pierrick", NA, "Julie", "Arsene", "Robin", "?", NA, "Laura", "Helios", NA,
"Betty", "Vasya", NA, "Gerard", "Florence", "Sandrine", NA, "Pierre", "Luna", "Nina", "Nathan", NA, "Pere vic et clo", "Victor", "Clotilde", NA, "Alain", "Bastien", "Simon", "Jeanne", "Marie", NA,
"Rebecca", "Mael", "Soan", NA, "Aurelien", "Haldora", "Lilas", NA, "Maman Mamo", "Louis", NA, "Manou",
"Linette", "Annie", NA, "Serge", "Fabienne", "Alain", "Jean-Yves", NA, "Olivier", "Lola", "Jeanne", NA, "Agnes", "Charlotte",
"Elsa", NA, "Martine", "Guillaume", "Melanie", NA, "Basile", "Charlie", "Mae", NA, "Marie", "?", NA,
"Jacques", "Cecile", "Paul", "Jacques", "Mimi (Marie-Blanche)", NA)
nodes<-data.frame(name=1:117)
edges <- data.frame(
from = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 2, 5, 4, 3, 28, 29, 27, 5,
4, 31, 33, 34, 35, 36, 37, 18, 28, 39, 40, 41, 29, 43, 44, 45, 46, 19, 48, 49, 19, 51, 52, 21, 54, 55, 56, 55, 58, 59, 60, 61, 56, 63, 64, 65,
20, 67, 68, 69, 70, 71, 73, 74, 75, 69, 70, 77, 79, 78, 81, 82, 10, 84, 82, 86, 85, 85, 88, 89, 90, 91, 89, 93, 94, 95,
90, 97, 98, 99, 91, 101, 102, 103, 102, 109, 110, 103, 105, 106, 107, 112, 113, 114, 115, 8, 116),
to = c(1, 1, 23, 23, 26, 26, 25, 25, 24, 24, 6, 6, 7, 7, 1, 1, 26, 23, 25, 24, 1, 23, 24, 25, 26, 30, 30, 30, 30,
32, 32, 32, 32, 38, 38, 38, 38, 42, 42, 42, 42, 47, 47, 47, 47, 47, 50, 50, 50, 53, 53, 53, 57, 57, 57, 57, 62, 62, 62, 62, 62, 66, 66, 66, 66,
72, 72, 72, 72, 72, 72, 76, 76, 76, 76, 80, 80, 80, 80, 83, 83, 83, 87, 87, 87, 87, 92, 92, 92, 92, 92, 96, 96, 96, 96,
100, 100, 100, 100, 104, 104, 104, 104, 111, 111, 111, 108, 108, 108, 108, 117, 117, 117, 117, 117, 117)
)
height<-c(2, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 3, 3, 5,
5, 1, 4, 6, 6, 4, 5, 3, 3, 4, 5, 4, 3, 3, 1, 1, 3, 2, 3, 1, 1, 2,
3, 1, 1, 1, 2, 3, 1, 2, 3, 1, 2, 5, 3, 3, 4, 3, 1, 1, 1, 2, 3, 1, 1, 2, 5, 3, 3, 3, 3, 4, 3, 1, 1, 2, 3, 1, 1, 2,
9, 9, 8, 9, 7, 7, 8, 7, 5, 5, 5, 6, 5, 3, 3, 4, 5, 3, 3, 4, 5, 3, 3, 4, 3, 1, 1, 2, 3, 1, 2, 9, 9, 7, 7, 7, 8)
g <- graph_from_data_frame(edges, vertices = nodes, directed = TRUE)
lext<-create_layout(g,layout='kk')
lext[,2]=height
ggraph(g, layout = lext) +
geom_edge_link() +
geom_node_point() +
geom_node_label(aes(label=first_name), size = 3, alpha = 0.9,
color = "#8856a7",repel=TRUE) +
theme_void()library(visNetwork)
library(igraph)
# Define nodes and edges
first_name = c(NA , "Agnes", "Vincent", "Susan", "Henri", "Anne-Marie", "Serge", "Sonia", "Peter", "Simone", "Andre", "George", "mama Papi", "one Nanie", "two Nanie", "Lia", "Thais", "Olivier",
"Joseph", "Sara", "Lucette", "Me", NA, NA, NA, NA, "Marie-Laure", "Antoine", "Arthur", NA, "Eric", NA, "Johnny", "Camille",
"Albin", "Nina", "Isabelle", NA, "Laurie", "Iris", "Pierrick", NA, "Julie", "Arsene", "Robin", "?", NA, "Laura", "Helios", NA,
"Betty", "Vasya", NA, "Gerard", "Florence", "Sandrine", NA, "Pierre", "Luna", "Nina", "Nathan", NA, "Pere vic et clo", "Victor", "Clotilde", NA, "Alain", "Bastien", "Simon", "Jeanne", "Marie", NA,
"Rebecca", "Mael", "Soan", NA, "Aurelien", "Haldora", "Lilas", NA, "Maman Mamo", "Louis", NA, "Manou",
"Linette", "Annie", NA, "Serge", "Fabienne", "Alain", "Jean-Yves", NA, "Olivier", "Lola", "Jeanne", NA, "Agnes", "Charlotte",
"Elsa", NA, "Martine", "Guillaume", "Melanie", NA, "Basile", "Charlie", "Mae", NA, "Marie", "?", NA,
"Jacques", "Cecile", "Paul", "Jacques", "Mimi (Marie-Blanche)", NA)
nodes <- data.frame(id = 1:117, label = first_name)
edges <- data.frame(
from = c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 2, 5, 4, 3, 28, 29, 27, 5,
4, 31, 33, 34, 35, 36, 37, 18, 28, 39, 40, 41, 29, 43, 44, 45, 46, 19, 48, 49, 19, 51, 52, 21, 54, 55, 56, 55, 58, 59, 60, 61, 56, 63, 64, 65,
20, 67, 68, 69, 70, 71, 73, 74, 75, 69, 70, 77, 79, 78, 81, 82, 10, 84, 82, 86, 85, 85, 88, 89, 90, 91, 89, 93, 94, 95,
90, 97, 98, 99, 91, 101, 102, 103, 102, 109, 110, 103, 105, 106, 107, 112, 113, 114, 115, 8, 116),
to = c(1, 1, 23, 23, 26, 26, 25, 25, 24, 24, 6, 6, 7, 7, 1, 1, 26, 23, 25, 24, 1, 23, 24, 25, 26, 30, 30, 30, 30,
32, 32, 32, 32, 38, 38, 38, 38, 42, 42, 42, 42, 47, 47, 47, 47, 47, 50, 50, 50, 53, 53, 53, 57, 57, 57, 57, 62, 62, 62, 62, 62, 66, 66, 66, 66,
72, 72, 72, 72, 72, 72, 76, 76, 76, 76, 80, 80, 80, 80, 83, 83, 83, 87, 87, 87, 87, 92, 92, 92, 92, 92, 96, 96, 96, 96,
100, 100, 100, 100, 104, 104, 104, 104, 111, 111, 111, 108, 108, 108, 108, 117, 117, 117, 117, 117, 117)
)
# Create the interactive network graph
visNetwork(nodes, edges) %>%
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE) %>%
visPhysics(stabilization = FALSE)library(geosphere)
library(ggplot2)
library(reshape2)
# Define the coordinates (latitude and longitude) for each capital
capitals <- data.frame(
city = c("London", "Paris", "Berlin", "Madrid", "Rome", "Vienna", "Amsterdam",
"Brussels", "Warsaw", "Prague", "Lisbon", "Athens", "Budapest",
"Stockholm", "Copenhagen", "Dublin"),
lat = c(51.5074, 48.8566, 52.5200, 40.4168, 41.9028, 48.2082, 52.3676,
50.8503, 52.2297, 50.0755, 38.7167, 37.9838, 47.4979, 59.3293,
55.6761, 53.3498),
lon = c(-0.1278, 2.3522, 13.4050, -3.7038, 12.4964, 16.3738, 4.9041,
4.3517, 21.0122, 14.4378, -9.1393, 23.7275, 19.0402, 18.0686,
12.5683, -6.2603)
)
# Calculate the distance matrix
distance_matrix <- distm(capitals[, c("lon", "lat")], fun = distHaversine) / 1000 # Convert to kilometers
# Convert distance matrix to a data frame for plotting
distance_df <- as.data.frame(as.table(as.matrix(distance_matrix)))
names(distance_df) <- c("Capital1", "Capital2", "Distance")
distance_df$Capital1 <- factor(distance_df$Capital1, labels=capitals$city)
distance_df$Capital2 <- factor(distance_df$Capital2, labels=capitals$city)
# Plot the heatmap
heatmap = ggplot(distance_df, aes(Capital1, Capital2, fill = Distance)) +
geom_tile(color = "black") +
scale_fill_gradientn(colors = c("blue", "white", "yellow", "orange", "red")) +
theme_minimal() +
labs(title = "Distance Between Major European Capitals (in km)",
x = "", y = "", fill = "Distance (km)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
heatmapAdapting graphs for colour blinds
library(geosphere)
library(ggplot2)
library(reshape2)
# Define the coordinates (latitude and longitude) for each capital
capitals <- data.frame(
city = c("London", "Paris", "Berlin", "Madrid", "Rome", "Vienna", "Amsterdam",
"Brussels", "Warsaw", "Prague", "Lisbon", "Athens", "Budapest",
"Stockholm", "Copenhagen", "Dublin"),
lat = c(51.5074, 48.8566, 52.5200, 40.4168, 41.9028, 48.2082, 52.3676,
50.8503, 52.2297, 50.0755, 38.7167, 37.9838, 47.4979, 59.3293,
55.6761, 53.3498),
lon = c(-0.1278, 2.3522, 13.4050, -3.7038, 12.4964, 16.3738, 4.9041,
4.3517, 21.0122, 14.4378, -9.1393, 23.7275, 19.0402, 18.0686,
12.5683, -6.2603)
)
# Calculate the distance matrix
distance_matrix <- distm(capitals[, c("lon", "lat")], fun = distHaversine) / 1000 # Convert to kilometers
# Convert distance matrix to a data frame for plotting
distance_df <- as.data.frame(as.table(as.matrix(distance_matrix)))
names(distance_df) <- c("Capital1", "Capital2", "Distance")
distance_df$Capital1 <- factor(distance_df$Capital1, labels=capitals$city)
distance_df$Capital2 <- factor(distance_df$Capital2, labels=capitals$city)
# Plot the heatmap
heatmap_viridis = ggplot(distance_df, aes(Capital1, Capital2, fill = Distance)) +
geom_tile(color = "black") +
scale_fill_viridis() +
theme_minimal() +
labs(title = "Distance Between Major European Capitals (in km)",
x = "", y = "", fill = "Distance (km)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
heatmap_viridisAviation data from 1990 : https://ourworldindata.org/global-aviation-emissions
# Load required libraries
library(ggplot2)
library(readr)
library(patchwork)
# Read the data
aviation = read.csv("../Rwork/30DayChartChallenge/aviation.csv")
# Line plot for Passenger Demand
a = ggplot(aviation, aes(x = Year, y = `Passenger.demand`, group = 1)) +
geom_line() +
labs(title = "Passenger Demand Over Time", x = "Year", y = "Passenger Demand") +
theme_minimal()
# Bar plot of Freight Demand Over Time
b = ggplot(aviation, aes(x = Year, y = `Freight.demand`)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Freight Demand Over Time (million-ton km)", x = "Year", y = "Freight Demand") +
theme_minimal()
#Scatter Plot of Carbon Intensity vs. Year
c <- ggplot(aviation, aes(x = Year, y = `Carbon.intensity`)) +
geom_point(aes(color = `Carbon.per.unit.energy`), size = 2) +
labs(title = "Scatter Plot of Carbon Intensity vs. Year", x = "Year", y = "Carbon Intensity (gCO₂ per passenger-km)") +
scale_color_gradient(low = "lightblue", high = "darkblue") +
theme_minimal()
# Line plot for CO2 Emissions
d = ggplot(aviation, aes(x = Year, y = `CO2.emissions`)) +
geom_line(color = "black") +
labs(title = "CO2 Emissions Over Time", x = "Year", y = "CO2 Emissions") +
theme_minimal()
a + b + c + dTwo ways of viewing the same data :
library(readr)
library(ggplot2)
library(patchwork)
disaster = read_csv("../Rwork/30DayChartChallenge/natural-disasters-by-type.csv")
plot_disaster <- ggplot(data = disaster, aes(x = Year, y = Disasters, group = Entity, color = Entity)) +
geom_line() +
labs(title = "Number of recorded natural disaster events, 1900 to 2024",
x = "Year",
y = "",
color = "Disasters") +
theme_minimal() + theme(legend.position = "left")
plot_disaster1 = ggplotly(plot_disaster)
#stacked bar plot
disasterr = disaster[-(1:165),]
pastel_colors <- c("#FBB4AE", "#E5D8BD", "#CCEBC5", "#DECBE4", "#FED9A6", "#B3CDE3", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4")
plot_disasterr <- ggplot(data = disasterr, aes(x = Year, y = Disasters, group = Entity, fill = Entity)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(values = pastel_colors )
labs(x = "Year", y = "") +
theme_minimal()Network graph using the distance between cities.
#Make the graph with k=3 nearest neighbors
library(ggraph)
library(igraph)
library(tidyverse)
library(tidygraph)
# install.packages("FastKNN")
mateuro<-as.matrix(eurodist)
n=ncol(mateuro)
library(FastKNN)
k=3
nn = matrix(0,n,k) # n x k
for (i in 1:n)
nn[i,] = k.nearest.neighbors(i, mateuro, k = k)
nngraph<-matrix(0,ncol=n,nrow=n)
for (i in 1:n){
nngraph[i,nn[i,]]=1
}
ig<-graph_from_adjacency_matrix(nngraph,mode = "undirected")
V(ig)$name<-rownames(mateuro)
coords<-cmdscale(eurodist,k=2)
layg<-graph_from_adjacency_matrix(mateuro,weighted=TRUE)
graph1 <- as_tbl_graph(ig)
ggraph(graph1,layout=coords) +
geom_edge_link(aes(colour = "red")) +
geom_node_point()+
geom_node_label(aes(label=rownames(mateuro)),repel=TRUE) +
labs(title = "Graph of the three nearest neighbours") +
theme_void() +
theme(legend.position = "none") # Load libraries
library(ggplot2)
# Prepare the data
aus<-readRDS("../Rwork/30DayChartChallenge/Aussie2.rds")
aust=t(aus)
aust=as.data.frame(aust)
#Create the plot
Cloth_Alcohol_correlation = ggplot(aust,aes(x=Alcohol_Tobacco_drugs,y=Clothing_footwear))+
geom_point(aes(col=Year), size = 2) +
scale_colour_gradient(low = "lightblue", high = "darkgreen") +
labs(title = "Price of clothing and alcohol in Australia", x = "Alcohol, Tabacco and drugs", y = "Clothing and footwear") +
geom_label(aes(x = 115, y = 102, label = "Negative correlation = -0.84"), color = "black", fill = "white", size = 3.5, hjust = 0) +
theme_bw()
Cloth_Alcohol_correlationFrom the ADB Bank : https://data.adb.org/dataset/australia-key-indicators
Reference to Anscombe’s quartet that shows the importance of looking at the scatter plot graph, and not only the statistics like correlation or mean !
# Load the libraries
library(datasauRus)
library(ggplot2)
library(dplyr)
# Define color palette
colors <- c("dino" = "#FF9999", "bullseye" = "#FFCC99", "star" = "#FFFF99", "slant_up" = "#99FF99")
# Filter the datasets from the datasaurus_dozen
datasets <- datasaurus_dozen %>%
filter(dataset %in% c("dino", "bullseye", "star", "slant_up"))
# Create the plots
ggplot(datasets, aes(x = x, y = y, color = dataset)) +
geom_point() +
scale_color_manual(values = colors) +
facet_wrap(~ dataset) +
theme_minimal() +
labs(title = "Datasaurus Dozen",
x = "X",
y = "Y") +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))Zooming in the correlation of Data from ABD Day 18 in Australia: